'''
http://www.banana-pi.org.cn
water
2017-12-01
down bananapi detail
'''

import os
import codecs
import requests

root = "D:/Pi"
home = "http://www.banana-pi.org.cn/"
default = "product.html"

def find_str_between(l, s_s, s_e):
    if l.find(s_s) == -1:
        return ""
    i_s = l.find(s_s) + len(s_s)
    s = l[i_s:]
    if s_e != "":
        i_e = s.find(s_e)
        s = s[:i_e]
    return s

def html2txt(s):
    lab = find_str_between(s, '<', '>')
    while lab != '':
        s = s.replace('<%s>' % lab, '')
        lab = find_str_between(s, '<', '>')
    return s

def down_img(href, file):
    s_h = href
    s_f = file

    s_p, s_n = os.path.split(s_f)
    if not os.path.exists(s_p):
        os.makedirs(s_p)

    if not os.path.exists(s_f):
        try:
            r = requests.get(s_h)
            f = open(s_f, "wb")
            f.write(r.content)
            f.close()
        except:
            print("\tError: %s" % s_f)

def getpage(url):
    fn = url.replace("/", "-")
    fn = fn.replace(":", "_")
    path = "%s/temp" % root
    if not os.path.exists(path):
        os.makedirs(path)
    fn = "%s/%s" % (path, fn)
    s = ""
    if os.path.exists(fn):
        f = codecs.open(fn, "r", "utf-8")
        s = f.read()
        f.close()
    else:
        try:
            s = requests.get(url).content.decode("utf-8")
            f = codecs.open(fn, "w", "utf-8")
            f.write(s)
            f.close()
        except:
            print("\terror! %s" % url)
            s = ""
    return s

def downdetail(url):
    t1 =  url.replace(home, "")[:-5]
    path = "%s/BPI-%s" %(root,t1)
    #print(path)
    if not os.path.exists(path):
        os.makedirs(path)

    s = getpage(url)
    if s == "":
        return
    # print(s)
    s = s.replace('<td>\n', '<td>')
    ls = s.split('\n')
    b_detail = False
    b_table = False
    i_c = 0
    for l in ls:
        if '<head>' in l:
            s_txt = ""
            s_inf = ""
        if '<title>' in l:
            tit = find_str_between(l, '>', '<')
            #print("\t\t%s" % tit)
            s_txt = tit
        if '<h4 class="light grey-text' in l:
            h4 = find_str_between(l, '>', '<')
            #print("\t\t%s" % h4)
            s_txt += "\n" + h4
        if '</div>' in l or '<ul' in l:
            b_detail = False
        if b_detail:
            txt = l.strip()
            if tit != '</p>':
                #print("\t\t%s" % txt)
                s_txt += "\n" + txt
        if '<p class="flow-text">' in l:
            b_detail = True

        if '<img class="responsive-img"' in l:
            src = find_str_between(l, 'src="', '"')
            s_href = home + src
            p1, f1 = os.path.split(s_href)
            s_file = path + "/bpi_" + f1
            #print("\t\t%s" % s_href)
            #print("\t\t%s" % s_file)
            down_img(s_href, s_file)

        if '</table>' in l:
            b_table = False
        if b_table:
            if "<td>" in l:
                l = l.replace("<td>", "").strip()
                if i_c == 0:
                    s_c1 = l.strip()
                    i_c += 1
                else:
                    s_c2 = l.strip()
                    i_c = 0
                    s_inf += "%-30s %s\n" %(s_c1, s_c2)
                    #print("\t %-30s %s" %(s_c1, s_c2))
        if '<table class="striped">' in l:
            b_table = True
            i_c = 0
        if '</html>' in l:
            s_txt = html2txt(s_txt)
            f = codecs.open(path + '/notes.txt', 'w', 'utf-8')
            f.write(s_txt)
            f.close()

            f = codecs.open(path + '/detail.txt', 'w', 'utf-8')
            f.write(s_inf)
            f.close()

def down():
    if not os.path.exists(root):
        os.makedirs(root)


    url = home + default
    s = getpage(url)
    if s == "":
        return
    #print(s)
    s_txt = ''
    ls = s.split('\n')
    for l in ls:
        #print(l)
        if '"card-title"' in l:
            tit = find_str_between(l, '">', '<')
            #print(tit)
        if '<p>' in l:
            con = find_str_between(l, '>', '')
        if '了解更多' in l:
            href = find_str_between(l, 'href="', '"')
            #print(href)
            s_txt += "%-25s %-50s %s\n" % (tit, home + href, con)
            print("%-25s %-50s %s" % (tit, home + href, con))
            downdetail(home + href)

    f = codecs.open(root + '/index_bpi.txt', 'w', 'utf-8')
    f.write(s_txt)
    f.close()

if __name__ == "__main__":
    down()